--- title: "Bayesian Mediation Analysis" author: "Qingzhao Yu" output: pdf_document: keep_tex: true date: "`r Sys.Date()`" documentclass: article classoption: letter --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo=TRUE,tidy=T, error=T, message=F, warning=F) ``` WinBUGS uses Gibb's sampler to draw sampels from the posterior distributions of interests. First we need to download WinBUGS form the website: https://www.mrc-bsu.cam.ac.uk/software/bugs/the-bugs-project-winbugs/ and unpack the package. We use the R package R2WinBUGS to call WinBUGS from R and perform the MCMC sampling from the posterior distributions of parameters of interests. We first install the package in R (`install.packages("R2WinBUGS")`) and load it. ```{r} #install.packages("R2WinBUGS") library(R2WinBUGS) ``` ###Continuous outcome and continuous mediator The following code generates a continuous predictor x, a continuous third variable m, and a continuous outcome y: ```{r} set.seed(1) N=100 x=rnorm(N,0,1) e1=rnorm(N,0,1) e2=rnorm(N,0,1) alpha=0.98 beta=1.96 c=1.28 M=alpha*x+e1 y=c*x+beta*M+e2 ``` From the data generation mechanism, we know that the true indirect effect of $M$ is $1.96\times 0.98=1.92$ and the direct effect is $1.28$. #### Method 1 The following codes use the cp method to estimate indirect effects. (The method is the same for countinuous or binary exposure variables). ```{r} data1<- list (x=x,M=M,y=y,N=N) inits<- function(){ list(alpha=0,c=0,beta=0,prec1=0.5,prec2=0.5) } med1.1<- bugs(data1, inits, model.file = "O:/My Documents/My Research/Research/Bayesian Mediation Analysis/Winbugs/mediation_c_c_c_1.txt", parameters = c("alpha","c","beta","var1","var2", "IE","DE"), n.chains = 1, n.iter = 11000,n.burnin=1000, n.thin=1, bugs.directory = "C:/Program Files/winbugs14_full_patched/WinBUGS14", debug = F) med1.1 plot(med1.1) head(med1.1$sims.matrix) ``` ```{r, include=FALSE} postscript(file="C:/Users/qyu/Desktop/book/book writing/chapters/chapter11/figures/hist_ccc1.eps") par(mfrow=c(1,2)) hist(med1.1$sims.matrix[,"IE"],xlab="Indirect Effect",main="Posterior Distribution of IE") hist(med1.1$sims.matrix[,"DE"],xlab="Direct Effect",main="Posterior Distribution of DE") dev.off() ``` ####Method 2 This is the method for partial differentiation based fixed changes of $\Delta x$ and $\Delta m$ that are decided by the range of x and m. ```{r} deltax=min(1,diff(range(x))/10) deltam=min(1,diff(range(M))/10) data1<- list(x=x,M=M,y=y,N=N,deltax=deltax,deltam=deltam) inits<- function(){ list(alpha=0,c=0,beta=0,prec1=0.5,prec2=0.5) } med1.2<- bugs(data1, inits, model.file = "O:/My Documents/My Research/Research/Bayesian Mediation Analysis/Winbugs/mediation_c_c_c_2_2.txt", parameters = c("alpha","c","beta","var1","var2", "ie","de","te"), n.chains = 1, n.iter = 11000,n.burnin=1000, bugs.directory = "O:/Current work/Program/winbugs143_unrestricted/winbugs14_full_patched/WinBUGS14/", debug = F) mean(med1.2$mean$alpha) mean(med1.2$mean$beta) mean(med1.2$mean$c) mean(med1.2$mean$ie) mean(med1.2$mean$de) mean(med1.2$mean$te) ``` Next we check the method in finding nonlinear third-variable effect. The following codes simulate a third-variable that is quadratically related with the exposure variable: ```{r} M.2=alpha*x*x+e1 y.2=c*x+beta*M.2+e2 ``` The model mediation_c_c_c_2_2 is changed based on mediation_c_c_c_2 where the model for M is quadratic. ```{r} deltax=min(0.01,diff(range(x))/100) deltam=min(0.01,diff(range(M.2))/100) data1<- list(x=x,M=M.2,y=y.2,N=N,deltax=deltax,deltam=deltam) inits<- function(){ list(alpha=0,c=0,beta=0,prec1=0.5,prec2=0.5) } med1.2.2<- bugs(data1, inits, model.file = "O:/My Documents/My Research/Research/Bayesian Mediation Analysis/Winbugs/mediation_c_c_c_2_2.txt", parameters = c("alpha","c","beta","var1","var2", "ie","de","te"), n.chains = 1, n.iter = 11000,n.burnin=1000, bugs.directory = "O:/Current work/Program/winbugs143_unrestricted/winbugs14_full_patched/WinBUGS14/", debug = F) mean(med1.2.2$mean$alpha) mean(med1.2.2$mean$beta) mean(med1.2.2$mean$c) mean(med1.2.2$mean$ie) mean(med1.2.2$mean$de) mean(med1.2.2$mean$te) ``` ```{r, include=FALSE} postscript(file="C:/Users/qyu/Desktop/book/book writing/chapters/chapter11/figures/ccc2_2.eps") par(mfrow=c(1,1)) o1=order(x) plot(x[o1],med1.2.2$mean$ie[o1],xlab="x",ylab="ie") dev.off() ``` The following code generate the same data but binary exposure ```{r} set.seed(1) N=100 x2=rnorm(N,0,1) x2=ifelse(x2<0,0,1) alpha=0.98 beta=1.96 c=1.28 M2=alpha*x2+e1 y2=c*x2+beta*M2+e2 ``` When the exposure is 0 or 1: ```{r} deltax=1 deltam=diff(range(M2))/10 data1<- list ( x=x2,M=M2,y=y2,N=N,deltax=deltax,deltam=deltam) inits<- function(){ list(alpha=0,c=0,beta=0,prec1=0.5,prec2=0.5) } med1.2<- bugs(data1, inits, model.file = "O:/My Documents/My Research/Research/Bayesian Mediation Analysis/Winbugs/mediation_c_c_b_2.txt", parameters = c("alpha","c","beta","var1","var2", "ie","de","te"), n.chains = 1, n.iter = 11000,n.burnin=1000, bugs.directory = "O:/Current work/Program/winbugs143_unrestricted/winbugs14_full_patched/WinBUGS14/", debug = F) mean(med1.2$mean$alpha) mean(med1.2$mean$beta) mean(med1.2$mean$c) mean(med1.2$mean$ie) mean(med1.2$mean$de) mean(med1.2$mean$te) ``` ####Method 2 version 2 The following codes use the differences of the observed x as $\Delta x$ and $\Delta M$ is defined as $E(M|x+\Delta x)-E(M|x)$ for continuous x. The method is very sensitive when the diff of x is small. ```{r} o1=order(x) x1=x[o1] M1=M[o1] y1=y[o1] deltax1=c(diff(x1),mean(diff(x1))) data1<- list ( x=x1,M=M1,y=y1,N=N,deltax=deltax1) inits<- function(){ list(alpha=0,c=0,beta=0,prec1=0.5,prec2=0.5) } med1.3<- bugs(data1, inits, model.file = "O:/My Documents/My Research/Research/Bayesian Mediation Analysis/Winbugs/mediation_con_con3.txt", parameters = c("alpha","c","beta","var1","var2", "ie","de","te"), n.chains = 1, n.iter = 11000,n.burnin=1000, bugs.directory = "O:/Current work/Program/winbugs143_unrestricted/winbugs14_full_patched/WinBUGS14/", debug = F) mean(med1.3$mean$alpha) mean(med1.3$mean$beta) mean(med1.3$mean$c) mean(med1.3$mean$ie) mean(med1.3$mean$de) mean(med1.3$mean$te) ``` ####Method 3 Use the marginal distribution of M ```{r} deltax=min(0.05,diff(range(x))/100) data1<- list ( x=x,M=M,y=y,N=N,deltax=deltax) inits<- function(){ list(alpha=0,c=0,beta=0,prec1=0.5,prec2=0.5) } med1.4<- bugs(data1, inits, model.file = "O:/My Documents/My Research/Research/Bayesian Mediation Analysis/Winbugs/mediation2_c_c_c_3.txt", parameters = c("alpha","c","beta","var1","var2", "ie","de","te"), n.chains = 1, n.iter = 15000,n.burnin=5000,n.thin=1, bugs.directory = "O:/Current work/Program/winbugs143_unrestricted/winbugs14_full_patched/WinBUGS14/", debug = F) mean(med1.4$mean$alpha) mean(med1.4$mean$beta) mean(med1.4$mean$c) mean(med1.4$mean$ie) mean(med1.4$mean$de) mean(med1.4$mean$te) plot(x[o1],med1.4$mean$ie[o1]) ``` The nonlinear case: ```{r} deltax=min(0.01,diff(range(x))/100) data1<- list (x=x,M=M.2,y=y.2,N=N,deltax=deltax) inits<- function(){ list(alpha=0,c=0,beta=0,prec1=0.5,prec2=0.5) } med1.4.2<- bugs(data1, inits, model.file = "O:/My Documents/My Research/Research/Bayesian Mediation Analysis/Winbugs/mediation2_c_c_c_3_2.txt", parameters = c("alpha","c","beta","var1","var2", "ie","de","te"), n.chains = 1, n.iter = 15000,n.burnin=5000,n.thin=1, bugs.directory = "O:/Current work/Program/winbugs143_unrestricted/winbugs14_full_patched/WinBUGS14/", debug = F) mean(med1.4.2$mean$alpha) mean(med1.4.2$mean$beta) mean(med1.4.2$mean$c) mean(med1.4.2$mean$ie) mean(med1.4.2$mean$de) mean(med1.4.2$mean$te) plot(x[o1],med1.4.2$mean$ie[o1]) ``` ```{r, include=FALSE} postscript(file="C:/Users/qyu/Desktop/book/book writing/chapters/chapter11/figures/ccc3_2.eps") par(mfrow=c(1,2)) o1=order(x) plot(x[o1],med1.2.2$mean$ie[o1],xlab="x",ylab="ie",main="Method 2",ylim=c(-10,10)) plot(x[o1],med1.4.2$mean$ie[o1],xlab="x",ylab="ie",main="Method 3",ylim=c(-10,10)) dev.off() ``` When the exposure is binary, taking value 1 or 0 ```{r} data1<- list ( x=x2,M=M2,y=y2,N=N,deltax=1) inits<- function(){ list(alpha=0,c=0,beta=0,prec1=0.5,prec2=0.5) } med1.4<- bugs(data1, inits, model.file = "O:/My Documents/My Research/Research/Bayesian Mediation Analysis/Winbugs/mediation_c_c_b_3.txt", parameters = c("alpha","c","beta","var1","var2", "ie","de","te"), n.chains = 1, n.iter = 15000,n.burnin=5000, bugs.directory = "O:/Current work/Program/winbugs143_unrestricted/winbugs14_full_patched/WinBUGS14/", debug = F) mean(med1.4$mean$alpha) mean(med1.4$mean$beta) mean(med1.4$mean$c) mean(med1.4$mean$ie) mean(med1.4$mean$de) mean(med1.4$mean$te) ``` ###Binary outcome and continuous mediator ####Method 1 Use $a\times b$, when the outcome is binary, the exposure can be either binary or continuous. Note that all the estimated effects are interpreted in terms of $logit(p(Y=1))$ but not in $Y$ directly. ```{r} mu_y=c*x+beta*M y1.2<-rbinom(100,1,exp(mu_y)/(1+exp(mu_y))) data1<- list ( x=x,M=M,y=y1.2,N=N) inits<- function(){ list(alpha=0,c=0,beta=0,prec1=0.5) } med2.1<- bugs(data1, inits, model.file = "O:/My Documents/My Research/Research/Bayesian Mediation Analysis/Winbugs/mediation_b_c_c.txt", parameters = c("alpha","c","beta","var1", "IE","DE"), n.chains = 1, n.iter = 1500,n.burnin=500, bugs.directory = "O:/Current work/Program/winbugs143_unrestricted/winbugs14_full_patched/WinBUGS14/", debug = F) med2.1 plot(med2.1) head(med2.1$sims.matrix) ``` ####Method 2: The partial differential method for continuous exposure #####Method 2 This is the method for partial differentiation based fixed changes of $\Delta x$ and $\Delta m$ that are decided by the range of x and m. In the code, the effects is in terms of $Y$ but not $logit(P(Y=1))$. If it is the $logit(P(Y=1))$ that is of interest, change the mu_Y[i] in the model to logit(mu_Y[i]) ```{r} deltax=diff(range(x))/10 deltam=diff(range(M))/10 data1<- list ( x=x,M=M,y=y1.2,N=N,deltax=deltax,deltam=deltam) inits<- function(){ list(alpha=0,c=0,beta=0,prec1=0.5) } med2.2<- bugs(data1, inits, model.file = "O:/My Documents/My Research/Research/Bayesian Mediation Analysis/Winbugs/mediation_b_c_c_2.txt", parameters = c("alpha","c","beta","var1", "ie","de","te"), n.chains = 1, n.iter = 11000,n.burnin=1000, bugs.directory = "O:/Current work/Program/winbugs143_unrestricted/winbugs14_full_patched/WinBUGS14/", debug = F) mean(med2.2$mean$alpha) mean(med2.2$mean$beta) mean(med2.2$mean$c) mean(med2.2$mean$ie) mean(med2.2$mean$de) mean(med2.2$mean$te) ``` #####For binary exposures The following code generate the same data but binary exposure ```{r} mu_y_2=c*x+beta*M y2.2<-rbinom(100,1,exp(mu_y_2)/(1+exp(mu_y_2))) ``` When the exposure is 0 or 1. The effect is in terms of $Y$. ```{r} deltax=1 deltam=diff(range(M2))/10 data1<- list ( x=x2,M=M2,y=y2.2,N=N,deltax=deltax,deltam=deltam) inits<- function(){list(alpha=0,c=0,beta=0,prec1=0.5)} med2.2.2<- bugs(data1, inits, model.file = "O:/My Documents/My Research/Research/Bayesian Mediation Analysis/Winbugs/mediation_b_c_b_2.txt", parameters = c("alpha","c","beta","var1", "ie","de","te"), n.chains = 1, n.iter = 11000,n.burnin=1000, bugs.directory = "O:/Current work/Program/winbugs143_unrestricted/winbugs14_full_patched/WinBUGS14/", debug = F) mean(med2.2.2$mean$alpha) mean(med2.2.2$mean$beta) mean(med2.2.2$mean$c) mean(med2.2.2$mean$ie) mean(med2.2.2$mean$de) mean(med2.2.2$mean$te) ``` ####Method 3 Use the marginal distribution of M ```{r} data1<- list ( x=x,M=M,y=y1.2,N=N,deltax=deltax) inits<- function(){list(alpha=0,c=0,beta=0,prec1=0.5)} med2.3<- bugs(data1, inits, model.file = "O:/My Documents/My Research/Research/Bayesian Mediation Analysis/Winbugs/mediation_b_c_c_3.txt", parameters = c("alpha","c","beta","var1", "ie","de","te"), n.chains = 1, n.iter = 15000,n.burnin=5000, bugs.directory = "O:/Current work/Program/winbugs143_unrestricted/winbugs14_full_patched/WinBUGS14/", debug = F) mean(med2.3$mean$alpha) mean(med2.3$mean$beta) mean(med2.3$mean$c) mean(med2.3$mean$ie) mean(med2.3$mean$de) mean(med2.3$mean$te) ``` When the exposure is binary, taking value 1 or 0 ```{r} data1<- list ( x=x2,M=M2,y=y2.2,N=N,deltax=1) inits<- function(){list(alpha=0,c=0,beta=0,prec1=0.5)} med2.3.2<- bugs(data1, inits, model.file = "O:/My Documents/My Research/Research/Bayesian Mediation Analysis/Winbugs/mediation_b_c_b_3.txt", parameters = c("alpha","c","beta","var1", "ie","de","te"), n.chains = 1, n.iter = 15000,n.burnin=5000, bugs.directory = "O:/Current work/Program/winbugs143_unrestricted/winbugs14_full_patched/WinBUGS14/", debug = F) mean(med2.3.2$mean$alpha) mean(med2.3.2$mean$beta) mean(med2.3.2$mean$c) mean(med2.3.2$mean$ie) mean(med2.3.2$mean$de) mean(med2.3.2$mean$te) ``` In summary, the main change with countinuous response is how to set up y: need to put a link on (my_y) and use a different random component. ###When the response variable is continuous In the model setting, we set log(mu_y[i]) <- c*x[i]+beta*M[i] y[i] ~ dpois(mu_y[i]) In general, for different outcome, we change the link function and the random term. To estimate the third-variable effects, the effect can be in terms of the original $Y$ (use mu_y) or the transformed outcome (e.g. use link(mu_y), where link is the link function). ###When the mediator is binary ####Method 1 the usual method 1 does not work since we cannot use alpha directly. ####Method 2 generate the data: ```{r} mu_m=alpha*x M3=rbinom(100,1,exp(mu_m)/(1+exp(mu_m))) y2.1=c*x+beta*M3+e2 ``` ####Method 2 The product of partial difference still wookrs. The only change is in the model: need to set up a link function and random term for the third variable. ```{r} deltax=diff(range(x))/10 deltam=1 data1<- list ( x=x,M=M3,y=y2.1,N=N,deltax=deltax,deltam=deltam) inits<- function(){list(alpha=0,c=0,beta=0,prec2=0.5)} med3.2<- bugs(data1, inits, model.file = "O:/My Documents/My Research/Research/Bayesian Mediation Analysis/Winbugs/mediation_c_b_c_2.txt", parameters = c("alpha","c","beta","var2", "ie","de","te"), n.chains = 1, n.iter = 11000,n.burnin=1000, bugs.directory = "O:/Current work/Program/winbugs143_unrestricted/winbugs14_full_patched/WinBUGS14/", debug = F) mean(med3.2$mean$alpha) mean(med3.2$mean$beta) mean(med3.2$mean$c) mean(med3.2$mean$ie) mean(med3.2$mean$de) mean(med3.2$mean$te) plot(x[o1],med3.2$mean$ie[o1]) ``` ####Method 3 Use the marginal distribution of M ```{r} data1<- list ( x=x,M=M3,y=y2.1,N=N,deltax=deltax) inits<- function(){list(alpha=0,c=0,beta=0,prec1=0.5)} med3.3<- bugs(data1, inits, model.file = "O:/My Documents/My Research/Research/Bayesian Mediation Analysis/Winbugs/mediation2_c_b_c_3.txt", parameters = c("alpha","c","beta","var2", "ie","de","te"), n.chains = 1, n.iter = 15000,n.burnin=5000, bugs.directory = "O:/Current work/Program/winbugs143_unrestricted/winbugs14_full_patched/WinBUGS14/", debug = F) mean(med3.3$mean$alpha) mean(med3.3$mean$beta) mean(med3.3$mean$c) mean(med3.3$mean$ie) mean(med3.3$mean$de) mean(med3.3$mean$te) plot(x[o1],med3.3$mean$ie[o1]) ``` ```{r, include=FALSE} postscript(file="C:/Users/qyu/Desktop/book/book writing/chapters/chapter11/figures/cbc.eps") par(mfrow=c(1,2)) plot(x[o1],med3.2$mean$ie[o1],xlab="x",ylab="ie",main="Method 2", ylim=c(0.15,0.45)) plot(x[o1],med3.3$mean$ie[o1],xlab="x",ylab="ie",main="Method 3", ylim=c(0.15,0.45)) dev.off() ``` In summary, when the mediator is binary, the main change is to set up m: need to put a link on (mu_M) and use a different random component. ##When the mediator is binary In the model setting, we set log(mu_M[i]) <- c*x[i]+beta*M[i] y[i] ~ dpois(mu_y[i]) In general, for different outcome, we change the link function and the random term. ##When the mediator is categorical generate the data: ```{r} set.seed(2) alpha1=0.5 alpha2=0.8 beta1=1 beta2=1.2 mu_m1=alpha1*x mu_m2=alpha2*x p=cbind(1,exp(mu_m1),exp(mu_m2)) M4=t(apply(p,1,rmultinom,n=1,size=1)) y4=c*x+beta1*M4[,2]+beta2*M4[,3]+e2 M4.1=as.vector(M4%*%(1:3)) ``` #### Method 1 The following codes use the cp method to estimate indirect effects. The result is not correct. ```{r} data1<- list (x=x,M=M4.1,y=y4,N=N) inits<- function(){ list(alpha1=0,alpha2=0,c=0,beta1=0,beta2=0,prec2=0.5) } med4.1<- bugs(data1, inits, model.file = "O:/My Documents/My Research/Research/Bayesian Mediation Analysis/Winbugs/mediation_c_cat_c_1.txt", parameters = c("alpha1","alpha2","c","beta1","beta2","var2", "ie","DE"), n.chains = 1, n.iter = 11000,n.burnin=1000, bugs.directory = "O:/Current work/Program/winbugs143_unrestricted/winbugs14_full_patched/WinBUGS14/", debug = F) mean(med4.1$mean$ie) mean(med4.1$mean$DE) mean(med4.1$mean$alpha1) mean(med4.1$mean$alpha2) mean(med4.1$mean$beta1) mean(med4.1$mean$beta2) mean(med4.1$mean$c) plot(x[o1],med4.1$mean$ie[o1]) ``` #### Method 2 The following codes use the product of differences method ```{r} deltax=min(0.01,diff(range(x))/100) data1<- list (x=x,M=M4.1,y=y4,N=N,deltax=deltax,deltam1=1,deltam2=1) inits<- function(){ list(alpha1=0,alpha2=0,c=0,beta1=0,beta2=0,prec2=0.5) } med4.2<- bugs(data1, inits, model.file = "O:/My Documents/My Research/Research/Bayesian Mediation Analysis/Winbugs/mediation_c_cat_c_2.txt", parameters = c("alpha1","alpha2","c","beta1","beta2","var2","ie1","ie2", "ie","de"), n.chains = 1, n.iter = 11000,n.burnin=1000, bugs.directory = "O:/Current work/Program/winbugs143_unrestricted/winbugs14_full_patched/WinBUGS14/", debug = F) mean(med4.2$mean$ie) mean(med4.2$mean$de) mean(med4.2$mean$alpha1) mean(med4.2$mean$alpha2) mean(med4.2$mean$beta1) mean(med4.2$mean$beta2) mean(med4.2$mean$c) plot(x[o1],med4.2$mean$ie[o1],ylim=c(-0.05,0.16),type="l") lines(x[o1],med4.2$mean$ie1[o1],lty=2) lines(x[o1],med4.2$mean$ie2[o1],lty=3) ``` #### Method 3 The following codes use the product of differences method ```{r} deltax=min(0.1,diff(range(x))/100) data1<- list (x=x,M=M4.1,y=y4,N=N,deltax=deltax) inits<- function(){ list(alpha1=0,alpha2=0,c=0,beta1=0,beta2=0,prec2=0.5) } med4.3<- bugs(data1, inits, model.file = "O:/My Documents/My Research/Research/Bayesian Mediation Analysis/Winbugs/mediation2_c_cat_c_3.txt", parameters = c("alpha1","alpha2","c","beta1","beta2","var2", "ie","de"), n.chains = 1, n.iter = 11000,n.burnin=1000,n.thin=1, bugs.directory = "O:/Current work/Program/winbugs143_unrestricted/winbugs14_full_patched/WinBUGS14/", debug = F) mean(med4.3$mean$ie) mean(med4.3$mean$de) mean(med4.3$mean$alpha1) mean(med4.3$mean$alpha2) mean(med4.3$mean$beta1) mean(med4.3$mean$beta2) mean(med4.3$mean$c) ``` ```{r,include=F} postscript(file="C:/Users/qyu/Desktop/book/book writing/chapters/chapter11/figures/cat_ie.eps") plot(x[o1],med4.3$mean$ie[o1],ylim=c(-0.05,0.25),ylab="IE",xlab="x") lines(x[o1],med4.2$mean$ie[o1]) lines(x[o1],med4.2$mean$ie1[o1],lty=2) lines(x[o1],med4.2$mean$ie2[o1],lty=3) dev.off() ```